In [11]:
import pandas as pd
import datetime
from datetime import date,timedelta
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default ='plotly_white'
In [2]:
data_control = pd.read_csv("control_group.csv",sep=';')
data_test = pd.read_csv("test_group.csv",sep=';')
In [3]:
# data_control.head()
# data_test.head()
In [4]:
data_control.columns = ["Campaign Name", "Date", "Amount Spent", 
                        "Number of Impressions", "Reach", "Website Clicks", 
                        "Searches Received", "Content Viewed", "Added to Cart",
                        "Purchases"]

data_test.columns = ["Campaign Name", "Date", "Amount Spent", 
                        "Number of Impressions", "Reach", "Website Clicks", 
                        "Searches Received", "Content Viewed", "Added to Cart",
                        "Purchases"]
In [5]:
data_control.head()
Out[5]:
Campaign Name Date Amount Spent Number of Impressions Reach Website Clicks Searches Received Content Viewed Added to Cart Purchases
0 Control Campaign 1.08.2019 2280 82702.0 56930.0 7016.0 2290.0 2159.0 1819.0 618.0
1 Control Campaign 2.08.2019 1757 121040.0 102513.0 8110.0 2033.0 1841.0 1219.0 511.0
2 Control Campaign 3.08.2019 2343 131711.0 110862.0 6508.0 1737.0 1549.0 1134.0 372.0
3 Control Campaign 4.08.2019 1940 72878.0 61235.0 3065.0 1042.0 982.0 1183.0 340.0
4 Control Campaign 5.08.2019 1835 NaN NaN NaN NaN NaN NaN NaN
In [6]:
data_test.head()
Out[6]:
Campaign Name Date Amount Spent Number of Impressions Reach Website Clicks Searches Received Content Viewed Added to Cart Purchases
0 Test Campaign 1.08.2019 3008 39550 35820 3038 1946 1069 894 255
1 Test Campaign 2.08.2019 2542 100719 91236 4657 2359 1548 879 677
2 Test Campaign 3.08.2019 2365 70263 45198 7885 2572 2367 1268 578
3 Test Campaign 4.08.2019 2710 78451 25937 4216 2216 1437 566 340
4 Test Campaign 5.08.2019 2297 114295 95138 5863 2106 858 956 768
In [7]:
data_control.isnull().sum()
Out[7]:
Campaign Name            0
Date                     0
Amount Spent             0
Number of Impressions    1
Reach                    1
Website Clicks           1
Searches Received        1
Content Viewed           1
Added to Cart            1
Purchases                1
dtype: int64
In [8]:
# for i in data_control.columns:
#     data_control[i].fillna(value=data_control[i].mean(), inplace=True)    
data_control["Number of Impressions"].fillna(value=data_control["Number of Impressions"].mean(), inplace=True)
data_control["Reach"].fillna(value=data_control["Reach"].mean(), 
                             inplace=True)
data_control["Website Clicks"].fillna(value=data_control["Website Clicks"].mean(), 
                                      inplace=True)
data_control["Searches Received"].fillna(value=data_control["Searches Received"].mean(), 
                                         inplace=True)
data_control["Content Viewed"].fillna(value=data_control["Content Viewed"].mean(), 
                                      inplace=True)
data_control["Added to Cart"].fillna(value=data_control["Added to Cart"].mean(), 
                                     inplace=True)
data_control["Purchases"].fillna(value=data_control["Purchases"].mean(), 
                                 inplace=True)
In [9]:
data = data_control.merge(data_test,how='outer').sort_values(["Date"])
data = data.reset_index(drop=True)
data.head()
C:\Users\kevin\anaconda3\lib\site-packages\pandas\core\reshape\merge.py:1207: UserWarning: You are merging on int and float columns where the float values are not equal to their int representation.
  warnings.warn(
Out[9]:
Campaign Name Date Amount Spent Number of Impressions Reach Website Clicks Searches Received Content Viewed Added to Cart Purchases
0 Control Campaign 1.08.2019 2280 82702.0 56930.0 7016.0 2290.0 2159.0 1819.0 618.0
1 Test Campaign 1.08.2019 3008 39550.0 35820.0 3038.0 1946.0 1069.0 894.0 255.0
2 Test Campaign 10.08.2019 2790 95054.0 79632.0 8125.0 2312.0 1804.0 424.0 275.0
3 Control Campaign 10.08.2019 2149 117624.0 91257.0 2277.0 2475.0 1984.0 1629.0 734.0
4 Test Campaign 11.08.2019 2420 83633.0 71286.0 3750.0 2893.0 2617.0 1075.0 668.0
In [ ]:
 
In [10]:
figure = px.scatter(data_frame = data,x='Number of Impressions',y='Amount Spent',size='Amount Spent',color='Campaign Name',trendline='ols')
figure
In [ ]: